Code
suppressPackageStartupMessages(library(readxl))
suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(plotly))
data = read_excel("Project_1_Data.xlsx", sheet = "pooled123")
filteredData = select(data, PID, BSSQ_1:BSSQ_15,ASSQ_1:ASSQ_15,age, VRexperience, ssq_modelled)
withVRexperience = filter(filteredData, VRexperience == 'Yes')
noVRexperience = filter(filteredData, VRexperience == 'No')
#calculating differences between baseline and active SSQ for each symptom
filteredData = mutate(filteredData, d_discomfort = ASSQ_1 - BSSQ_1)
filteredData = mutate(filteredData, d_fatigue = ASSQ_2 - BSSQ_2)
filteredData = mutate(filteredData, d_headache = ASSQ_3 - BSSQ_3)
filteredData = mutate(filteredData, d_eyestrain = ASSQ_4 - BSSQ_4)
filteredData = mutate(filteredData, d_difficulty_focusing = ASSQ_5 - BSSQ_5)
filteredData = mutate(filteredData, d_salivation = ASSQ_6 - BSSQ_6)
filteredData = mutate(filteredData, d_sweating = ASSQ_7 - BSSQ_7)
filteredData = mutate(filteredData, d_nausea = ASSQ_8 - BSSQ_8)
filteredData = mutate(filteredData, d_difficulty_concentrating = ASSQ_9 - BSSQ_9)
filteredData = mutate(filteredData, d_fullness_of_head = ASSQ_10 - BSSQ_10)
filteredData = mutate(filteredData, d_blurred_vision = ASSQ_11 - BSSQ_11)
filteredData = mutate(filteredData, d_dizziness_o = ASSQ_12 - BSSQ_12)
filteredData = mutate(filteredData, d_dizziness_c = ASSQ_13 - BSSQ_13)
filteredData = mutate(filteredData, d_vertigo = ASSQ_14 - BSSQ_14)
filteredData = mutate(filteredData, d_stomach_awareness = ASSQ_15 - BSSQ_15)
#reclasss VR experience as factor (was chr)
filteredData$VRexperience = as.factor(filteredData$VRexperience)
#we want to filter this data even further and split it into age groups
#once in age groups, calculate the mean change for each of the age groups for each symptom
filteredData = mutate(filteredData, age_group = case_when(
age >= 16 & age <= 21 ~ "16 to 21",
age >= 22 & age <= 29 ~ "22 to 29",
age >= 30 & age <= 37 ~ "30 to 37",
age >= 38 & age <= 45 ~ "38 to 45",
age > 45 ~ "above 45"
))
#convert the age groups into factors
filteredData$age_group = as.factor(filteredData$age_group)
#renaming columns to be more informative
names(filteredData)[names(filteredData) == 'BSSQ_1'] <- 'BSSQ_discomfort'
names(filteredData)[names(filteredData) == 'BSSQ_2'] <- 'BSSQ_fatigue'
names(filteredData)[names(filteredData) == 'BSSQ_3'] <- 'BSSQ_headache'
names(filteredData)[names(filteredData) == 'BSSQ_4'] <- 'BSSQ_eyestrain'
names(filteredData)[names(filteredData) == 'BSSQ_5'] <- 'BSSQ_difficulty_focusing'
names(filteredData)[names(filteredData) == 'BSSQ_6'] <- 'BSSQ_salivation'
names(filteredData)[names(filteredData) == 'BSSQ_7'] <- 'BSSQ_sweating'
names(filteredData)[names(filteredData) == 'BSSQ_8'] <- 'BSSQ_nausea'
names(filteredData)[names(filteredData) == 'BSSQ_9'] <- 'BSSQ_difficulty_concentrating'
names(filteredData)[names(filteredData) == 'BSSQ_10'] <- 'BSSQ_fullness_of_head'
names(filteredData)[names(filteredData) == 'BSSQ_11'] <- 'BSSQ_blurred_vision'
names(filteredData)[names(filteredData) == 'BSSQ_12'] <- 'BSSQ_dizziness_o'
names(filteredData)[names(filteredData) == 'BSSQ_13'] <- 'BSSQ_dizziness_c'
names(filteredData)[names(filteredData) == 'BSSQ_14'] <- 'BSSQ_vertigo'
names(filteredData)[names(filteredData) == 'BSSQ_15'] <- 'BSSQ_stomach_awareness'
names(filteredData)[names(filteredData) == 'ASSQ_1'] <- 'ASSQ_discomfort'
names(filteredData)[names(filteredData) == 'ASSQ_2'] <- 'ASSQ_fatigue'
names(filteredData)[names(filteredData) == 'ASSQ_3'] <- 'ASSQ_headache'
names(filteredData)[names(filteredData) == 'ASSQ_4'] <- 'ASSQ_eyestrain'
names(filteredData)[names(filteredData) == 'ASSQ_5'] <- 'ASSQ_difficulty_focusing'
names(filteredData)[names(filteredData) == 'ASSQ_6'] <- 'ASSQ_salivation'
names(filteredData)[names(filteredData) == 'ASSQ_7'] <- 'ASSQ_sweating'
names(filteredData)[names(filteredData) == 'ASSQ_8'] <- 'ASSQ_nausea'
names(filteredData)[names(filteredData) == 'ASSQ_9'] <- 'ASSQ_difficulty_concentrating'
names(filteredData)[names(filteredData) == 'ASSQ_10'] <- 'ASSQ_fullness_of_head'
names(filteredData)[names(filteredData) == 'ASSQ_11'] <- 'ASSQ_blurred_vision'
names(filteredData)[names(filteredData) == 'ASSQ_12'] <- 'ASSQ_dizziness_o'
names(filteredData)[names(filteredData) == 'ASSQ_13'] <- 'ASSQ_dizziness_c'
names(filteredData)[names(filteredData) == 'ASSQ_14'] <- 'ASSQ_vertigo'
names(filteredData)[names(filteredData) == 'ASSQ_15'] <- 'ASSQ_stomach_awareness'
Initial Data Analysis (IDA)
Source
Our data was sourced from Cosette Saunder’s PhD and honours thesis paper “Socially Acquired Nocebo Effects Generalize but Are Not Attenuated by Choice”. (ask about how much context we need to provide here)
Structure
The data contained 336 records of participants in the study, each with 51 variables. In particular, our research focused the following variables:
Baseline SSQ of 16 symptoms (quantitative, discrete): self-reported symptom severity of participants before undergoing VR, on a scale of 1 to 10.
Active SSQ of 16 symptoms (quantitative, discrete): self-reported symptom severity of participants after undergoing VR, on a scale of 1 to 10
The age of the participants (quantitative, discrete); they were then sorted into age groups – re-classed as ‘factor’ (qualitative, ordinal)
- This was to allow the relationship between age groups and symptoms reported to be seen
Whether the participant has had previous VR experience (qualitative, nominal); this was reclassified from ‘character’ into ‘factor’.
- R misidentified this as ‘chr’, it should be a qualitative variable
The change in active and baseline SSQ was calculated for each participant, each symptom (quantitative, discrete).
Code
library(RColorBrewer)
data_experience = select(filteredData, VRexperience)
exp_counted = data_experience %>% count(VRexperience)
vr_pie = plot_ly(exp_counted, labels = ~VRexperience, values = ~n,
type = 'pie')
vr_pie <- vr_pie %>% layout(title = 'Distribution of VR experience',
showlegend = TRUE)
vr_pie
Code
data_age_groups = select(filteredData, age_group)
groups_counted = data_age_groups %>% count(age_group)
age_pie = plot_ly(groups_counted, labels = ~age_group, values = ~n,
type = 'pie')
age_pie <- age_pie %>% layout(title = 'Distribution of ages',
showlegend = TRUE)
age_pie
Limitations
Assumptions